clear; 
rng(0)

%% Extract the regression coefficients of Healthy Controls (HC)
clinicalState = {'HC_avg.mat'}; 
load ('HC_avg.mat');

regressionCoeffients = {subjectgene.betasAvg};
subjectNumber = numel(regressionCoeffients);
numberOfGene = 976;
numberOfFactor = 6;

allBeta = zeros (subjectNumber,numberOfGene*numberOfFactor^2);
allSubjectID = {subjectgene.RID};

for sub = 1:subjectNumber
        beta =  regressionCoeffients{sub}(9:end,:);
         allBeta(sub,:) = reshape(beta,1,[]);   
end

%% Load the IDs of stable HC and pick the columns of coefficients whose 99% CI excludes zero
load ('postRegressSubjectID');
IdStableHC = agingID; 

indexStable = find (ismember(cell2mat(allSubjectID), IdStableHC));
numberOfStable = numel(IdStableHC);

lowCI = mean(allBeta(indexStable,:)) - 2.58*std(allBeta(indexStable,:))/sqrt(numberOfStable);
highCI= mean(allBeta(indexStable,:)) + 2.58*std(allBeta(indexStable,:))/sqrt(numberOfStable);

signLow = sign (lowCI);
signHigh = sign (highCI);

reliableBetaIndex = find (signLow == signHigh);
reliableBeta = allBeta(indexStable,reliableBetaIndex);


%% Compute the slope of cognitive score on age

load('cognitiveScores');

for subj = 1:length (IdStableHC)

index1 = find (IdCogScores==IdStableHC(subj));

mMSE = MMSE(index1);
aDAS11 = ADAS11(index1);
aDAS13 = ADAS13(index1);
age1 = visitTimes(index1);

index2 = find (IdForEFandMEM==IdStableHC(subj));

mEM = MEM(index2);
eF = EF(index2);
age2 = visitTimesForEFandMEM(index2);

regressionCoef = [regress(mMSE,[ones(size(age1,1), 1) age1]) regress(aDAS11,[ones(size(age1,1), 1) age1]) regress(aDAS13,[ones(size(age1,1), 1) age1]) regress(mEM,[ones(size(age2,1), 1) age2]) regress(eF,[ones(size(age2,1), 1) age2])]; 

cogSlope(subj,:) = regressionCoef(2,:);
end


%% Compute singluar value decomposition between gene-imaging coefficients and slope of cognitive score

[PC_pcntvar,R,Xcontr_on_PCs,XExpected_contr,XExpected_contr_PCs,V,U,Vs,Us,X_order, singVal, newV] = SVD_multivariate_analysis(cogSlope,reliableBeta,[], [],5);

%% Perform permutation analysis
[numberOfSubject, numberOfFeature] = size (reliableBeta);
numberOfPerm = 10000;

for n=1:numberOfPerm
    fprintf('Permuation number %d ', n);
    permutation = randperm (numberOfSubject);
    Y = cogSlope(permutation,:);
    X = reliableBeta;
    [pcntvar_perm,~,contr_perm,expect_perm,~,~,U_perm,~,~,~, singVal_new, newV_perm]  = SVD_multivariate_analysis(Y,X,[], [], 5);
    
    [d, z, rotated] = procrustes(newV',newV_perm');
    V_perm = newV_perm * singVal_new * rotated.T;
    V_perm= sqrt(sum(V_perm.^2));
    A = diag(V_perm);
    singVal_perm(n,:) = diag(A')/norm(diag(A'),1);
    
end

p_values = mean(singVal_perm>diag(singVal)');

%% Perform boostrapping of the first PC (significant)
numberOfBoot = 10000;

U_boot = zeros(numberOfBoot,numberOfFeature);
U_boot_flip = zeros(numberOfBoot,numberOfFeature);
pcntvar_boot = zeros(numberOfBoot, 1);

U = U(1,:);

for n=1:numberOfBoot
    sprintf ('bootstrap number: %d',n)
    permutation = datasample(1:numberOfStable, numberOfStable);
    X = reliableBeta(permutation,:);
    Y = cogSlope(permutation,:);
    [pcntvar_boot(n),~,contr_boot,expect_boot,~,~,U_boot(n,:),~,~,~, ~, ~]  = SVD_multivariate_analysis(Y,X,[], [],1);
    U_boot_flip(n,:) = abs(U_boot(n,:)).*sign(U);
end

%% Evaluate bootstrap ratio and select top genes
StdError = std(U_boot_flip);
bootRatio = abs(U./StdError);
significantBetaIndex = find (bootRatio>2.58);

significantBetaIndexActual = reliableBetaIndex(significantBetaIndex);
geneContribution = zeros (numberOfFactor*6*numberOfGene,1);
geneContribution(significantBetaIndexActual) = 1; 

%% Extract the names of top genes and the factors they modulate
load('landmarkGenesInfo');
geneName = landmarkGenesInfo.gene_names;

indices = zeros (numberOfGene, numberOfFactor); %sort to have each gene by six factors consecutively
for i = 1:numberOfGene
    for n =1:numberOfFactor
indices(i,n) = numberOfGene*(n-1)+i;
    end
end
indices = reshape (indices', 1, []);

for fac= 1:numberOfFactor
    geneFactor = geneContribution(numberOfGene*numberOfFactor*(fac-1)+1:numberOfGene*numberOfFactor*fac);
    geneFactor = geneFactor(indices);
    rowIndex = find(geneFactor);
    geneInteraction = {};
    
    mergedRow = ceil(rowIndex/6);
    mergedFactor = mod (rowIndex,6);
    mergedFactor (mergedFactor==0)= 6;
    geneRow = unique(mergedRow);
    %
    for uniqueGene = 1:length (geneRow)
        n = find (mergedRow == geneRow(uniqueGene));
        geneIndex = geneRow(uniqueGene);
        geneInteraction(uniqueGene,:) = {geneName(geneIndex), mergedFactor(n)};
    end
    % geneImaging contains 6 by 1 struct, each row represents a
    % longitudinal biological factor (1=CBF, 2=amyloid, 3=funtional
    % activity, 4= metabolism 5=Grey matter density 6=tau
    
    %In each row, the gene and the factor directly modulated can be found
    geneImaging(fac).interact = geneInteraction;
end


%% Evaluate total contributions of each top gene on PC1
varianceBoot = var (U_boot_flip);
significantVar = varianceBoot(significantBetaIndex);
U_significant = U(significantBetaIndex);

significantGene = mod (significantBetaIndexActual, numberOfGene);
significantGene(significantGene==0) = numberOfGene;

uniqueGene = unique (significantGene, 'stable');
U_final = zeros(numel(uniqueGene),1);
stdError_final = zeros (numel(uniqueGene),1);
geneName = geneName(uniqueGene);
for k = 1: length (uniqueGene)
    index = find (significantGene==uniqueGene(k));    
    if length(index)>1
        U_final (k) = sum (abs(U_significant(index)));
    else
        U_final (k) = sum (U_significant(index));
    end
    stdError_final (k) =  sqrt (sum(significantVar(index)));
end


